How do teachers process technology-based formative assessment results in their daily practice? Results from process mining of think-aloud data

Reproducible documentation of data analysis

Author

First author, second author, last author

library(readxl)
library(lubridate)

Attaching package: 'lubridate'
The following objects are masked from 'package:base':

    date, intersect, setdiff, union
library(skimr)
library(tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr   1.1.4     ✔ readr   2.1.5
✔ forcats 1.0.0     ✔ stringr 1.5.1
✔ ggplot2 3.5.0     ✔ tibble  3.2.1
✔ purrr   1.0.2     ✔ tidyr   1.3.0
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(ggplot2)
library(psych)

Attaching package: 'psych'

The following objects are masked from 'package:ggplot2':

    %+%, alpha
library(DescTools)

Attaching package: 'DescTools'

The following objects are masked from 'package:psych':

    AUC, ICC, SD
library(lpSolve)
library(irr)
library(viridisLite)
library(viridis)
library(devtools)
Loading required package: usethis
library(hrbrthemes)
library(ggalt)
Registered S3 methods overwritten by 'ggalt':
  method                  from   
  grid.draw.absoluteGrob  ggplot2
  grobHeight.absoluteGrob ggplot2
  grobWidth.absoluteGrob  ggplot2
  grobX.absoluteGrob      ggplot2
  grobY.absoluteGrob      ggplot2
library(heuristicsmineR)
library(scales)

Attaching package: 'scales'

The following object is masked from 'package:viridis':

    viridis_pal

The following objects are masked from 'package:psych':

    alpha, rescale

The following object is masked from 'package:purrr':

    discard

The following object is masked from 'package:readr':

    col_factor
library(bupaR)

Attaching package: 'bupaR'

The following object is masked from 'package:stats':

    filter

The following object is masked from 'package:utils':

    timestamp
library(gghalves)
library(openxlsx)
library(distributional)
library(ggdist)

Attaching package: 'ggdist'

The following object is masked from 'package:DescTools':

    Mode
library(cluster)
library(ComplexHeatmap)
Loading required package: grid
========================================
ComplexHeatmap version 2.15.4
Bioconductor page: http://bioconductor.org/packages/ComplexHeatmap/
Github page: https://github.com/jokergoo/ComplexHeatmap
Documentation: http://jokergoo.github.io/ComplexHeatmap-reference

If you use it in published research, please cite either one:
- Gu, Z. Complex Heatmap Visualization. iMeta 2022.
- Gu, Z. Complex heatmaps reveal patterns and correlations in multidimensional 
    genomic data. Bioinformatics 2016.


The new InteractiveComplexHeatmap package can directly export static 
complex heatmaps into an interactive Shiny app with zero effort. Have a try!

This message can be suppressed by:
  suppressPackageStartupMessages(library(ComplexHeatmap))
========================================
library(circlize)
========================================
circlize version 0.4.16
CRAN page: https://cran.r-project.org/package=circlize
Github page: https://github.com/jokergoo/circlize
Documentation: https://jokergoo.github.io/circlize_book/book/

If you use it in published research, please cite:
Gu, Z. circlize implements and enhances circular visualization
  in R. Bioinformatics 2014.

This message can be suppressed by:
  suppressPackageStartupMessages(library(circlize))
========================================
library(sjmisc)

Attaching package: 'sjmisc'

The following object is masked from 'package:DescTools':

    %nin%

The following object is masked from 'package:purrr':

    is_empty

The following object is masked from 'package:tidyr':

    replace_na

The following object is masked from 'package:tibble':

    add_case

The following object is masked from 'package:skimr':

    to_long
library(partitionComparison)

Attaching package: 'partitionComparison'

The following object is masked from 'package:DescTools':

    N

Participants

demographics <- read_xlsx("data/data_demographics.xlsx")

Age

describe(demographics$age)
   vars  n  mean    sd median trimmed   mad min max range skew kurtosis   se
X1    1 48 44.08 11.57     43   43.92 17.79  26  64    38 0.01    -1.31 1.67

Teaching Experience

describe(demographics$exp)
   vars  n  mean    sd median trimmed   mad min max range skew kurtosis   se
X1    1 48 16.54 10.54     17    15.9 11.86   1  40    39 0.41    -0.77 1.52

Gender 

1 = female, 0 = male

demographics%>% 
group_by(gender) %>%
 summarise(gender_count = n(),
           gender_Per = gender_count / 48 * 100) 
# A tibble: 2 × 3
  gender gender_count gender_Per
   <dbl>        <int>      <dbl>
1      0           27       56.2
2      1           21       43.8

Education level 

p = primary education, s = secondary education

demographics%>% 
group_by(prim_sec) %>%
 summarise(edlevel_count = n(),
           edlevel_Per = edlevel_count / 48 * 100) 
# A tibble: 2 × 3
  prim_sec edlevel_count edlevel_Per
  <chr>            <int>       <dbl>
1 p                   15        31.2
2 s                   33        68.8

Subjects

demographics%>% 
  reframe(stem = sum(stem),
          stem_Per = stem / 48 * 100, 
          lang = sum(lang),
          lang_Per = lang / 48 * 100,
          social = sum(social),
          social_Per = social / 48 * 100,
          art = sum(art),
          art_Per = art / 48 * 100)
# A tibble: 1 × 8
   stem stem_Per  lang lang_Per social social_Per   art art_Per
  <dbl>    <dbl> <dbl>    <dbl>  <dbl>      <dbl> <dbl>   <dbl>
1    43     89.6    45     93.8     38       79.2    24      50

Analyzing reliability of coding procedure

Main steps of processing and graph types (coding run 1)

codings_event_run1 <- read_excel("data/codings for reliability/codings_event_run1.xlsx")

Wrangling: from event to turn data

codings_turn_run1 <- codings_event_run1%>%
      mutate(time = map2(start, end, seq, by = "1 sec"))%>%
  unnest(cols = time)%>%
  select(-start, -end, -turnID)%>%
  group_by(time, pid, coder)%>%
  summarise(code = toString(code))%>%
  ungroup()%>%
  splitstackshape::cSplit_e("code", sep = ",", type = "character", drop = F, fill = 0)%>%
  as_tibble()%>%
  arrange(pid)%>%
  mutate(coder = str_sub(coder, -1))
`summarise()` has grouped output by 'time', 'pid'. You can override using the
`.groups` argument.

Main steps

nr: noticing results

codings_turn_run1%>%
  select(coder, pid, code_nr, time)%>%
  spread(coder, code_nr)%>%
  mutate(a = ifelse(is.na(a), 0, a),
         b = ifelse(is.na(b), 0, b))%>%
  select(a, b)%>%
  t(.)%>%
  kripp.alpha(., method = "nominal")
 Krippendorff's alpha

 Subjects = 28675 
   Raters = 2 
    alpha = 0.622 

ae: analyzing errors

codings_turn_run1%>%
  select(coder, pid, code_ae, time)%>%
  spread(coder, code_ae)%>%
  mutate(a = ifelse(is.na(a), 0, a),
         b = ifelse(is.na(b), 0, b))%>%
  select(a, b)%>%
  t(.)%>%
  kripp.alpha(., method = "nominal")
 Krippendorff's alpha

 Subjects = 28675 
   Raters = 2 
    alpha = 0.581 

pp: comparing with personal perspective

codings_turn_run1%>%
  select(coder, pid, code_pp, time)%>%
  spread(coder, code_pp)%>%
  mutate(a = ifelse(is.na(a), 0, a),
         b = ifelse(is.na(b), 0, b))%>%
  select(a, b)%>%
  t(.)%>%
  kripp.alpha(., method = "nominal")
 Krippendorff's alpha

 Subjects = 28675 
   Raters = 2 
    alpha = 0.635 

ii: constructing instructional implications

codings_turn_run1%>%
  select(coder, pid, code_ii, time)%>%
  spread(coder, code_ii)%>%
  mutate(a = ifelse(is.na(a), 0, a),
         b = ifelse(is.na(b), 0, b))%>%
  select(a, b)%>%
  t(.)%>%
  kripp.alpha(., method = "nominal")
 Krippendorff's alpha

 Subjects = 28675 
   Raters = 2 
    alpha = 0.704 

Graph types

g1: graph type 1

codings_turn_run1%>%
  select(coder, pid, code_g1, time)%>%
  spread(coder, code_g1)%>%
  mutate(a = ifelse(is.na(a), 0, a),
         b = ifelse(is.na(b), 0, b))%>%
  select(a, b)%>%
  t(.)%>%
  kripp.alpha(., method = "nominal")
 Krippendorff's alpha

 Subjects = 28675 
   Raters = 2 
    alpha = 0.85 

g2: graph type 2

codings_turn_run1%>%
  select(coder, pid, code_g2, time)%>%
  spread(coder, code_g2)%>%
  mutate(a = ifelse(is.na(a), 0, a),
         b = ifelse(is.na(b), 0, b))%>%
  select(a, b)%>%
  t(.)%>%
  kripp.alpha(., method = "nominal")
 Krippendorff's alpha

 Subjects = 28675 
   Raters = 2 
    alpha = 0.88 

g3: graph type 3

codings_turn_run1%>%
  select(coder, pid, code_g3, time)%>%
  spread(coder, code_g3)%>%
  mutate(a = ifelse(is.na(a), 0, a),
         b = ifelse(is.na(b), 0, b))%>%
  select(a, b)%>%
  t(.)%>%
  kripp.alpha(., method = "nominal")
 Krippendorff's alpha

 Subjects = 28675 
   Raters = 2 
    alpha = 0.949 

g4: graph type 4

codings_turn_run1%>%
  select(coder, pid, code_g4, time)%>%
  spread(coder, code_g4)%>%
  mutate(a = ifelse(is.na(a), 0, a),
         b = ifelse(is.na(b), 0, b))%>%
  select(a, b)%>%
  t(.)%>%
  kripp.alpha(., method = "nominal")
 Krippendorff's alpha

 Subjects = 28675 
   Raters = 2 
    alpha = 0.899 

g5: graph type 5

codings_turn_run1%>%
  select(coder, pid, code_g5, time)%>%
  spread(coder, code_g5)%>%
  mutate(a = ifelse(is.na(a), 0, a),
         b = ifelse(is.na(b), 0, b))%>%
  select(a, b)%>%
  t(.)%>%
  kripp.alpha(., method = "nominal")
 Krippendorff's alpha

 Subjects = 28675 
   Raters = 2 
    alpha = 0.804 

Noticing specific aspects (coding run 2)

To calculate the intercoderreliability of coding run 2, we did not use Krippendorffs alpha. Several categories were coded very seldom (=low prevalence) and in these cases, the coefficient produced unplausible values. Therefore, we used Cohens kappa and if all raters or one rater coded 0 in one category for one teacher´s think-aloud, percentage of agreement was calculated because in these cases, Cohens kappa could not be calculated. For practical reasons, there were two different teams rated the videos (team 1: coder c and d, 6 Videos; team 2: coder d and 3, rest).

Intercoderreliability of Coder c and d

codings_run2_coder_c_d <- read_excel("data/codings for reliability/codings_binary_run2_coder_c_d.xlsx")

as: assessment finished

codings_run2_coder_c_d%>%
  select(as_coder_c, as_coder_d)%>%
  mutate(agreement = ifelse(as_coder_c == as_coder_d, 1, 0))%>%
  summarise(agreement_per = sum(agreement)/6*100)
# A tibble: 1 × 1
  agreement_per
          <dbl>
1          83.3
as_coder_c_d <- codings_run2_coder_c_d%>%
  select(as_coder_c, as_coder_d)

xtab_as_cd <- xtabs (~ as_coder_c_d$as_coder_c + as_coder_c_d$as_coder_d)
  
CohenKappa(xtab_as_cd)
[1] 0.6666667

rn: match individual result to student name

codings_run2_coder_c_d%>%
  select(rn_coder_c, rn_coder_d)%>%
  mutate(agreement = ifelse(rn_coder_c == rn_coder_d, 1, 0))%>%
  summarise(agreement_per = sum(agreement)/6*100)
# A tibble: 1 × 1
  agreement_per
          <dbl>
1          66.7
rn_coder_c_d <- codings_run2_coder_c_d%>%
  select(rn_coder_c, rn_coder_d)

xtab_rn_cd <- xtabs (~ rn_coder_c_d$rn_coder_c + rn_coder_c_d$rn_coder_d)
  
CohenKappa(xtab_rn_cd)
[1] 0.25

me: mean

codings_run2_coder_c_d%>%
  select(me_coder_c, me_coder_d)%>%
  mutate(agreement = ifelse(me_coder_c == me_coder_d, 1, 0))%>%
  summarise(agreement_per = sum(agreement)/6*100)
# A tibble: 1 × 1
  agreement_per
          <dbl>
1           100
me_coder_c_d <- codings_run2_coder_c_d%>%
  select(me_coder_c, me_coder_d)

xtab_me_cd <- xtabs (~ me_coder_c_d$me_coder_c + me_coder_c_d$me_coder_d)
  
CohenKappa(xtab_me_cd)
[1] 1

bg: best group

codings_run2_coder_c_d%>%
  select(bg_coder_c, bg_coder_d)%>%
  mutate(agreement = ifelse(bg_coder_c == bg_coder_d, 1, 0))%>%
  summarise(agreement_per = sum(agreement)/6*100)
# A tibble: 1 × 1
  agreement_per
          <dbl>
1          83.3
bg_coder_c_d <- codings_run2_coder_c_d%>%
  select(bg_coder_c, bg_coder_d)

xtab_bg_cd <- xtabs (~ bg_coder_c_d$bg_coder_c + bg_coder_c_d$bg_coder_d)
  
CohenKappa(xtab_bg_cd)
[1] 0.6666667

mg: middle group

codings_run2_coder_c_d%>%
  select(mg_coder_c, mg_coder_d)%>%
  mutate(agreement = ifelse(mg_coder_c == mg_coder_d, 1, 0))%>%
  summarise(agreement_per = sum(agreement)/6*100)
# A tibble: 1 × 1
  agreement_per
          <dbl>
1          83.3
mg_coder_c_d <- codings_run2_coder_c_d%>%
  select(mg_coder_c, mg_coder_d)

xtab_mg_cd <- xtabs (~ mg_coder_c_d$mg_coder_c + mg_coder_c_d$mg_coder_d)
  
#CohenKappa(xtab_mg_cd)

wg: weakest group

codings_run2_coder_c_d%>%
  select(wg_coder_c, wg_coder_d)%>%
  mutate(agreement = ifelse(wg_coder_c == wg_coder_d, 1, 0))%>%
  summarise(agreement_per = sum(agreement)/6*100)
# A tibble: 1 × 1
  agreement_per
          <dbl>
1           100
wg_coder_c_d <- codings_run2_coder_c_d%>%
  select(wg_coder_c, wg_coder_d)

xtab_wg_cd <- xtabs (~ wg_coder_c_d$wg_coder_c + wg_coder_c_d$wg_coder_d)
  
CohenKappa(xtab_wg_cd)
[1] 1

ou: outlier

codings_run2_coder_c_d%>%
  select(ou_coder_c, ou_coder_d)%>%
  mutate(agreement = ifelse(ou_coder_c == ou_coder_d, 1, 0))%>%
  summarise(agreement_per = sum(agreement)/6*100)
# A tibble: 1 × 1
  agreement_per
          <dbl>
1          83.3
ou_coder_c_d <- codings_run2_coder_c_d%>%
  select(ou_coder_c, ou_coder_d)

xtab_ou_cd <- xtabs (~ ou_coder_c_d$ou_coder_c + ou_coder_c_d$ou_coder_d)
  
#CohenKappa(xtab_ou_cd)

bs: best student

codings_run2_coder_c_d%>%
  select(bs_coder_c, bs_coder_d)%>%
  mutate(agreement = ifelse(bs_coder_c == bs_coder_d, 1, 0))%>%
  summarise(agreement_per = sum(agreement)/6*100)
# A tibble: 1 × 1
  agreement_per
          <dbl>
1           100
bs_coder_c_d <- codings_run2_coder_c_d%>%
  select(bs_coder_c, bs_coder_d)

xtab_bs_cd <- xtabs (~ bs_coder_c_d$bs_coder_c + bs_coder_c_d$bs_coder_d)
  
CohenKappa(xtab_bs_cd)
[1] 1

ws: weakest student

codings_run2_coder_c_d%>%
  select(ws_coder_c, ws_coder_d)%>%
  mutate(agreement = ifelse(ws_coder_c == ws_coder_d, 1, 0))%>%
  summarise(agreement_per = sum(agreement)/6*100)
# A tibble: 1 × 1
  agreement_per
          <dbl>
1          83.3
ws_coder_c_d <- codings_run2_coder_c_d%>%
  select(ws_coder_c, ws_coder_d)

xtab_ws_cd <- xtabs (~ ws_coder_c_d$ws_coder_c + ws_coder_c_d$ws_coder_d)
  
CohenKappa(xtab_ws_cd)
[1] 0.5714286

dg: dispersion general

codings_run2_coder_c_d%>%
  select(dg_coder_c, dg_coder_d)%>%
  mutate(agreement = ifelse(dg_coder_c == dg_coder_d, 1, 0))%>%
  summarise(agreement_per = sum(agreement)/6*100)
# A tibble: 1 × 1
  agreement_per
          <dbl>
1           100
dg_coder_c_d <- codings_run2_coder_c_d%>%
  select(dg_coder_c, dg_coder_d)

xtab_dg_cd <- xtabs (~ dg_coder_c_d$dg_coder_c + dg_coder_c_d$dg_coder_d)
  
CohenKappa(xtab_dg_cd)
[1] 1

ra: range

codings_run2_coder_c_d%>%
  select(ra_coder_c, ra_coder_d)%>%
  mutate(agreement = ifelse(ra_coder_c == ra_coder_d, 1, 0))%>%
  summarise(agreement_per = sum(agreement)/6*100)
# A tibble: 1 × 1
  agreement_per
          <dbl>
1          83.3
ra_coder_c_d <- codings_run2_coder_c_d%>%
  select(ra_coder_c, ra_coder_d)

xtab_ra_cd <- xtabs (~ ra_coder_c_d$ra_coder_c + ra_coder_c_d$ra_coder_d)
  
CohenKappa(xtab_ra_cd)
[1] 0.5714286

dm: dispersion mean average

codings_run2_coder_c_d%>%
  select(dm_coder_c, dm_coder_d)%>%
  mutate(agreement = ifelse(dm_coder_c == dm_coder_d, 1, 0))%>%
  summarise(agreement_per = sum(agreement)/6*100)
# A tibble: 1 × 1
  agreement_per
          <dbl>
1           100
dm_coder_c_d <- codings_run2_coder_c_d%>%
  select(dm_coder_c, dm_coder_d)

xtab_dm_cd <- xtabs (~ dm_coder_c_d$dm_coder_c + dm_coder_c_d$dm_coder_d)
  
CohenKappa(xtab_dm_cd)
[1] NaN

rs: result individual student

codings_run2_coder_c_d%>%
  select(rs_coder_c, rs_coder_d)%>%
  mutate(agreement = ifelse(rs_coder_c == rs_coder_d, 1, 0))%>%
  summarise(agreement_per = sum(agreement)/6*100)
# A tibble: 1 × 1
  agreement_per
          <dbl>
1          83.3
rs_coder_c_d <- codings_run2_coder_c_d%>%
  select(rs_coder_c, rs_coder_d)

xtab_rs_cd <- xtabs (~ rs_coder_c_d$rs_coder_c + rs_coder_c_d$rs_coder_d)
  
CohenKappa(xtab_rs_cd)
[1] 0.5714286

Intercoderreliability of coder d and e

codings_run2_coder_d_e <- read_excel("data/codings for reliability/codings_binary_run2_coder_d_e.xlsx")

as: assessment finished

codings_run2_coder_d_e%>%
  select(as_coder_d, as_coder_e)%>%
  mutate(agreement = ifelse(as_coder_d == as_coder_e, 1, 0))%>%
  summarise(agreement_per = sum(agreement)/21*100)
# A tibble: 1 × 1
  agreement_per
          <dbl>
1          95.2
as_coder_d_e <- codings_run2_coder_d_e%>%
  select(as_coder_d, as_coder_e)

xtab_as_de <- xtabs (~ as_coder_d_e$as_coder_d + as_coder_d_e$as_coder_e)
  
CohenKappa(xtab_as_de)
[1] 0.9041096

rn: match individual result to student name

codings_run2_coder_d_e%>%
  select(rn_coder_d, rn_coder_e)%>%
  mutate(agreement = ifelse(rn_coder_d == rn_coder_e, 1, 0))%>%
  summarise(agreement_per = sum(agreement)/21*100)
# A tibble: 1 × 1
  agreement_per
          <dbl>
1          71.4
rn_coder_d_e <- codings_run2_coder_d_e%>%
  select(rn_coder_d, rn_coder_e)

xtab_rn_de <- xtabs (~ rn_coder_d_e$rn_coder_d + rn_coder_d_e$rn_coder_e)
  
CohenKappa(xtab_rn_de)
[1] 0.4166667

me: mean

codings_run2_coder_d_e%>%
  select(me_coder_d, me_coder_e)%>%
  mutate(agreement = ifelse(me_coder_d == me_coder_e, 1, 0))%>%
  summarise(agreement_per = sum(agreement)/21*100)
# A tibble: 1 × 1
  agreement_per
          <dbl>
1          85.7
me_coder_d_e <- codings_run2_coder_d_e%>%
  select(me_coder_d, me_coder_e)

xtab_me_de <- xtabs (~ me_coder_d_e$me_coder_d + me_coder_d_e$me_coder_e)
  
CohenKappa(xtab_me_de)
[1] 0.7096774

bg: best group

codings_run2_coder_d_e%>%
  select(bg_coder_d, bg_coder_e)%>%
  mutate(agreement = ifelse(bg_coder_d == bg_coder_e, 1, 0))%>%
  summarise(agreement_per = sum(agreement)/21*100)
# A tibble: 1 × 1
  agreement_per
          <dbl>
1          90.5
bg_coder_d_e <- codings_run2_coder_d_e%>%
  select(bg_coder_d, bg_coder_e)

xtab_bg_de <- xtabs (~ bg_coder_d_e$bg_coder_d + bg_coder_d_e$bg_coder_e)
  
CohenKappa(xtab_bg_de)
[1] 0.7692308

mg: middle group

codings_run2_coder_d_e%>%
  select(mg_coder_d, mg_coder_e)%>%
  mutate(agreement = ifelse(mg_coder_d == mg_coder_e, 1, 0))%>%
  summarise(agreement_per = sum(agreement)/21*100)
# A tibble: 1 × 1
  agreement_per
          <dbl>
1           100
mg_coder_d_e <- codings_run2_coder_d_e%>%
  select(mg_coder_d, mg_coder_e)

xtab_mg_de <- xtabs (~ mg_coder_d_e$mg_coder_d + mg_coder_d_e$mg_coder_e)
  
CohenKappa(xtab_mg_de)
[1] 1

wg: weakest group

codings_run2_coder_d_e%>%
  select(wg_coder_d, wg_coder_e)%>%
  mutate(agreement = ifelse(wg_coder_d == wg_coder_e, 1, 0))%>%
  summarise(agreement_per = sum(agreement)/21*100)
# A tibble: 1 × 1
  agreement_per
          <dbl>
1          90.5
wg_coder_d_e <- codings_run2_coder_d_e%>%
  select(wg_coder_d, wg_coder_e)

xtab_wg_de <- xtabs (~ wg_coder_d_e$wg_coder_d + wg_coder_d_e$wg_coder_e)
  
CohenKappa(xtab_wg_de)
[1] 0.7878788

ou: outlier

codings_run2_coder_d_e%>%
  select(ou_coder_d, ou_coder_e)%>%
  mutate(agreement = ifelse(ou_coder_d == ou_coder_e, 1, 0))%>%
  summarise(agreement_per = sum(agreement)/21*100)
# A tibble: 1 × 1
  agreement_per
          <dbl>
1          85.7
ou_coder_d_e <- codings_run2_coder_d_e%>%
  select(ou_coder_d, ou_coder_e)

xtab_ou_de <- xtabs (~ ou_coder_d_e$ou_coder_d + ou_coder_d_e$ou_coder_e)
  
CohenKappa(xtab_ou_de)
[1] 0.4878049

bs: best student

codings_run2_coder_d_e%>%
  select(bs_coder_d, bs_coder_e)%>%
  mutate(agreement = ifelse(bs_coder_d == bs_coder_e, 1, 0))%>%
  summarise(agreement_per = sum(agreement)/21*100)
# A tibble: 1 × 1
  agreement_per
          <dbl>
1          85.7
bs_coder_d_e <- codings_run2_coder_d_e%>%
  select(bs_coder_d, bs_coder_e)

xtab_bs_de <- xtabs (~ bs_coder_d_e$bs_coder_d + bs_coder_d_e$bs_coder_e)
  
CohenKappa(xtab_bs_de)
[1] 0.6896552

ws: weakest student

codings_run2_coder_d_e%>%
  select(ws_coder_d, ws_coder_e)%>%
  mutate(agreement = ifelse(ws_coder_d == ws_coder_e, 1, 0))%>%
  summarise(agreement_per = sum(agreement)/21*100)
# A tibble: 1 × 1
  agreement_per
          <dbl>
1          76.2
ws_coder_d_e <- codings_run2_coder_d_e%>%
  select(ws_coder_d, ws_coder_e)

xtab_ws_de <- xtabs (~ ws_coder_d_e$ws_coder_d + ws_coder_d_e$ws_coder_e)
  
CohenKappa(xtab_ws_de)
[1] 0.5205479

dg: dispersion general

codings_run2_coder_d_e%>%
  select(dg_coder_d, dg_coder_e)%>%
  mutate(agreement = ifelse(dg_coder_d == dg_coder_e, 1, 0))%>%
  summarise(agreement_per = sum(agreement)/21*100)
# A tibble: 1 × 1
  agreement_per
          <dbl>
1          85.7
dg_coder_d_e <- codings_run2_coder_d_e%>%
  select(dg_coder_d, dg_coder_e)

xtab_dg_de <- xtabs (~ dg_coder_d_e$dg_coder_d + dg_coder_d_e$dg_coder_e)
  
CohenKappa(xtab_dg_de)
[1] 0.4878049

ra: range

codings_run2_coder_d_e%>%
  select(ra_coder_d, ra_coder_e)%>%
  mutate(agreement = ifelse(ra_coder_d == ra_coder_e, 1, 0))%>%
  summarise(agreement_per = sum(agreement)/21*100)
# A tibble: 1 × 1
  agreement_per
          <dbl>
1          95.2
ra_coder_d_e <- codings_run2_coder_d_e%>%
  select(ra_coder_d, ra_coder_e)

xtab_ra_de <- xtabs (~ ra_coder_d_e$ra_coder_d + ra_coder_d_e$ra_coder_e)
  
CohenKappa(xtab_ra_de)
[1] 0.8292683

dm: dispersion mean average

codings_run2_coder_d_e%>%
  select(dm_coder_d, dm_coder_e)%>%
  mutate(agreement = ifelse(dm_coder_d == dm_coder_e, 1, 0))%>%
  summarise(agreement_per = sum(agreement)/21*100)
# A tibble: 1 × 1
  agreement_per
          <dbl>
1          85.7
dm_coder_d_e <- codings_run2_coder_d_e%>%
  select(dm_coder_d, dm_coder_e)

xtab_dm_de <- xtabs (~ dm_coder_d_e$dm_coder_d + dm_coder_d_e$dm_coder_e)
  
CohenKappa(xtab_dm_de)
[1] 0.3225806

rs: result individual student

codings_run2_coder_d_e%>%
  select(rs_coder_d, rs_coder_e)%>%
  mutate(agreement = ifelse(rs_coder_d == rs_coder_e, 1, 0))%>%
  summarise(agreement_per = sum(agreement)/21*100)
# A tibble: 1 × 1
  agreement_per
          <dbl>
1          71.4
rn_coder_d_e <- codings_run2_coder_d_e%>%
  select(rn_coder_d, rn_coder_e)

xtab_rn_de <- xtabs (~ rn_coder_d_e$rn_coder_d + rn_coder_d_e$rn_coder_e)
  
CohenKappa(xtab_rn_de)
[1] 0.4166667

Consensus codings

Main steps of processing

eventdata_processing <- read_excel("data/eventdata_processing.xlsx")

Binary codings of noticing specific aspects

data_aspects <- read_excel("data/data_noticing_aspects.xlsx")

Results

Research Question 1: Processing in general (descriptive analysis and visualization)

Relative durations of main steps

Wrangling: from event data to turn

turndata_processing <- 
eventdata_processing%>%
  mutate(time = map2(start, end, seq, by = "1 sec"))%>%
  unnest(cols = time)%>%
  select(-start, -end, -turnID)%>%
  group_by(time, pid)%>%
  summarise(code = toString(code))%>%
  ungroup()%>%
  splitstackshape::cSplit_e("code", sep = ",", type = "character", drop = F, fill = 0)%>%
  as_tibble()%>%
  arrange(pid) 
`summarise()` has grouped output by 'time'. You can override using the
`.groups` argument.

Calculating relative durations of main steps

processing_duration <- 
turndata_processing%>%
  mutate(noticing_results = ifelse(code_noticing_results == 1 & code_disturbance == 0, 1, 0), 
         comparing_perspective = ifelse(code_comparing_perspective == 1 & code_disturbance == 0, 1, 0),
         analyzing_errors = ifelse(code_analyzing_errors == 1 & code_disturbance == 0, 1, 0), 
         constructing_implications = ifelse(code_constructing_implications == 1 & code_disturbance == 0, 1, 0),
         think_aloud_clean = ifelse(code_think_aloud == 1 & code_disturbance == 0, 1, 0))%>%
  group_by(pid)%>%
  summarize(think_aloud_duration = sum(think_aloud_clean),
            noticing_results_duration = sum(noticing_results)/think_aloud_duration*100,
            comparing_perspective_duration = sum(comparing_perspective)/think_aloud_duration*100,
            analyzing_errors_duration = sum(analyzing_errors)/think_aloud_duration*100,
            constructing_implications_duration = sum(constructing_implications)/think_aloud_duration*100)%>%
  select(-think_aloud_duration)%>%
    pivot_longer(., names_to = "var", values_to = "val", - pid)%>%
  ungroup()%>%
  mutate_at(vars(val), funs(round(., 1)))
Warning: `funs()` was deprecated in dplyr 0.8.0.
ℹ Please use a list of either functions or lambdas:

# Simple named list: list(mean = mean, median = median)

# Auto named with `tibble::lst()`: tibble::lst(mean, median)

# Using lambdas list(~ mean(., trim = .2), ~ median(., na.rm = TRUE))

Visualizing relative durations of main steps for each teacher

processing_duration%>%
  ggplot(
    aes(x = factor(var, level = c("constructing_implications_duration", "analyzing_errors_duration", "comparing_perspective_duration", "noticing_results_duration")), 
    y = val, fill = var)) + 
  stat_halfeye(adjust = 0.5, 
                       width = 0.5, 
                       .width = 0, 
                       alpha =0.5,
                       justification = -.3, 
                       point_colour = NA,
                       show.legend = FALSE) + 
  geom_boxplot(aes(fill = var), 
                 show.legend = FALSE,
                 alpha = 0.2, 
                 outlier.shape = NA,
                 width = 0.18) + 
    scale_fill_manual(values = 
                         c("noticing_results_duration" = "#481f70",
                           "comparing_perspective_duration" ="#30688e",
                           "analyzing_errors_duration"= "#20a486", 
                           "constructing_implications_duration" = "#8fd744")) +
  geom_half_point(aes(color = var), 
                            show.legend = FALSE,
                            side = "l", 
                            range_scale = 0.3, 
                            width = 1,
                            alpha = 1,
                            size = 0.5) +
    scale_color_manual(values = 
                         c("noticing_results_duration" = "#481f70",
                           "comparing_perspective_duration" ="#30688e",
                           "analyzing_errors_duration"= "#20a486", 
                           "constructing_implications_duration" = "#8fd744")) +
  coord_flip() +
  ggtitle("") +
    theme_minimal() + 
    theme(axis.title.x = element_text(size = 17),
          axis.text.x = element_text(size = 15),
          axis.text.y = element_text(size = 17),
          legend.position = "none") + 
     scale_x_discrete(labels = c("Constructing\ninstructional\n implications",
                                 "Analyzing\nerrors", 
                                 "Comparing\nwith\npersonal\nperspective",
                                "Noticing\nresults"
                                )) +
    xlab("") + 
    ylab("Relative durations of main steps, in %") 

ggsave("Figure 1_Relative durations of main steps.png", plot = last_plot(), device = "png", width = 10, height = 7)

Noticing specific aspects

data_aspects%>%
  count(code)%>%
  mutate(Percentage = n/47*100)%>% 
  mutate(aspect = case_when(code == "assessment_finished" ~ "Assessment finished",
                            code == "result_name" ~ "Match individual result to name", 
                            code == "mean" ~ "Mean", 
                            code == "best_group" ~ "Best group", 
                            code == "middle_group" ~ "Middle group", 
                            code == "weakest_group" ~ "Weakest group", 
                            code == "outlier" ~ "Outlier", 
                            code == "best_student" ~ "Best student", 
                            code == "weakest_student" ~ "Weakest student", 
                            code == "disperson_general" ~ "Disperson general", 
                            code == "range" ~ "Range", 
                            code == "disperson_mean" ~ "Disperson mean average", 
                            code == "result_student" ~ "Result individual student"))%>%
  arrange(Percentage)%>%
  ggplot(aes(x = fct_rev(fct_reorder(aspect, Percentage)), y = Percentage, fill = Percentage)) + geom_col() + 
  scale_fill_viridis_c() + 
  theme_minimal(base_size = 20) + 
  scale_x_discrete(labels = label_wrap(20)) + 
    labs(title = "", x = "", y = "Percentage of teachers") + 
  theme(legend.position = "none", 
        axis.text.x = element_text(angle = 40, vjust = 1, hjust=1))

ggsave("Figure 2_Percentage of teachers who noticed specific aspects.png", plot = last_plot(), device = "png", units = c("cm"), width = 33, height = 20)

Visualizing sequences of main steps (codings) per teacher

Wrangling

eventdata_processing_sequences <- 
eventdata_processing%>%
  filter(code != "disturbance")%>%
  mutate(start_p = start, 
         end_p = end, 
         code_ungrouped = case_when(grepl("noticing_results", code) ~ "Noticing results",
                                    grepl("comparing_perspective", code) ~ "Comparing with personal perspective",
                                    grepl("analyzing_errors", code) ~ "Analyzing errors",
                                    grepl("constructing_implications", code) ~ "Constructing instructional implications",
                                    grepl("think_aloud", code) ~ "Think-aloud duration",
                                    TRUE ~ "other"),
         code_legend = case_when(code == "noticing_results" ~ "Noticing results",
                                  code == "comparing_perspective" ~ "Comparing with personal perspective",
                                  code == "analyzing_errors" ~ "Analyzing errors",
                                  code == "constructing_implications" ~ "Constructing instructional implications",
                                  code == "think_aloud" ~ "Think-aloud duration"),
         code_legend_nd = ifelse(code_ungrouped == "Noticing results", code_legend, NA), 
         code_legend_AB = ifelse(code_ungrouped == "Comparing with personal perspective", code_legend, NA),
         code_legend_ae = ifelse(code_ungrouped == "Analyzing errors", code_legend, NA),
         code_legend_ci = ifelse(code_ungrouped == "Constructing instructional implications", code_legend, NA), 
         code_legend_ta = ifelse(code_ungrouped == "Think-aloud duration", code_legend, NA))

Visualization

  ggplot(eventdata_processing_sequences, aes(y = code_ungrouped)) + 
  geom_segment(data = eventdata_processing_sequences %>%
                 filter(code_ungrouped == "Think-aloud duration")%>%
                 mutate("Think-aloud duration" = code_legend),
               aes(x = start_p, xend = end_p, yend = code_ungrouped, color = "Think-aloud duration"), lineend = "butt", linewidth = 4) +
  geom_segment(data = eventdata_processing_sequences %>%
                 filter(code_ungrouped == "Noticing results")%>%
                 mutate("Noticing results" = code_legend),
               aes(x = start_p, xend = end_p, yend = code_ungrouped, color = "Noticing results"), lineend = "butt", linewidth = 4) +
  geom_segment(data = eventdata_processing_sequences %>%
               filter(code_ungrouped == "Comparing with personal perspective")%>%
               mutate("Comparing with personal perspective" = code_legend),
             aes(x = start_p, xend = end_p, yend = code_ungrouped, color = "Comparing with personal perspective"), lineend = "butt", linewidth = 4) +
  geom_segment(data = eventdata_processing_sequences %>%
               filter(code_ungrouped == "Analyzing errors")%>%
               mutate("Analyzing errors" = code_legend),
             aes(x = start_p, xend = end_p, yend = code_ungrouped, color = "Analyzing errors"), lineend = "butt", linewidth = 4) +
  geom_segment(data = eventdata_processing_sequences %>%
               filter(code_ungrouped == "Constructing instructional implications")%>%
               mutate("Constructing instructional implications" = code_legend),
             aes(x = start_p, xend = end_p, yend = code_ungrouped, color = "Constructing instructional implications"), lineend = "butt", linewidth = 4) +
  guides(color=guide_legend(title="Main steps")) + 
 facet_wrap(~`pid`, ncol = 3) + 
  scale_colour_manual(values = c("Noticing results" = "#481f70",
                                 "Comparing with personal perspective" = "#30688e",
                                 "Analyzing errors" = "#20a486",
                                 "Constructing instructional implications" = "#8fd744",
                                 "Think-aloud duration" = "darkgrey"),
                      breaks = c("Noticing results", "Comparing with personal perspective", "Analyzing errors", "Constructing instructional implications", "Think-aloud duration")) + 
  labs(title = "") + 
  scale_y_discrete(limits = c("Think-aloud duration", "Constructing instructional implications", "Analyzing errors", "Comparing with personal perspective", "Noticing results")) +
  xlab("Time (in min)") + 
  ylab("") +
  theme_minimal() + 
   theme(legend.position = "bottom", legend.justification = "center", legend.direction = "horizontal", legend.title = element_text(size = 10), 
  legend.text = element_text(size = 10),
  plot.title = element_text(hjust = 0, face = "bold", size = 20),
   axis.text = element_text(size = 10),
        axis.title=element_text(size = 10),
  strip.text.x = element_text(size = 10)) +
   scale_fill_discrete(breaks = c("Noticing results", "Comparing with personal perspective", "Analyzing errors", "Constructing instructional implications", "Think-aloud duration")) 

ggsave("Figure 3_Sequences of main steps per teacher_portrait.png", plot = last_plot(), device = "png", units = c("cm"), width = 34, height = 48, dpi = 300)

Landscape

  ggplot(eventdata_processing_sequences, aes(y = code_ungrouped)) + 
  geom_segment(data = eventdata_processing_sequences %>%
                 filter(code_ungrouped == "Think-aloud duration")%>%
                 mutate("Think-aloud duration" = code_legend),
               aes(x = start_p, xend = end_p, yend = code_ungrouped, color = "Think-aloud duration"), lineend = "butt", linewidth = 4) +
  geom_segment(data = eventdata_processing_sequences %>%
                 filter(code_ungrouped == "Noticing results")%>%
                 mutate("Noticing results" = code_legend),
               aes(x = start_p, xend = end_p, yend = code_ungrouped, color = "Noticing results"), lineend = "butt", linewidth = 4) +
  geom_segment(data = eventdata_processing_sequences %>%
               filter(code_ungrouped == "Comparing with personal perspective")%>%
               mutate("Comparing with personal perspective" = code_legend),
             aes(x = start_p, xend = end_p, yend = code_ungrouped, color = "Comparing with personal perspective"), lineend = "butt", linewidth = 4) +
  geom_segment(data = eventdata_processing_sequences %>%
               filter(code_ungrouped == "Analyzing errors")%>%
               mutate("Analyzing errors" = code_legend),
             aes(x = start_p, xend = end_p, yend = code_ungrouped, color = "Analyzing errors"), lineend = "butt", linewidth = 4) +
  geom_segment(data = eventdata_processing_sequences %>%
               filter(code_ungrouped == "Constructing instructional implications")%>%
               mutate("Constructing instructional implications" = code_legend),
             aes(x = start_p, xend = end_p, yend = code_ungrouped, color = "Constructing instructional implications"), lineend = "butt", linewidth = 4) +
  guides(color=guide_legend(title="Main steps")) + 
 facet_wrap(~`pid`, ncol = 4) + 
  scale_colour_manual(values = c("Noticing results" = "#481f70",
                                 "Comparing with personal perspective" = "#30688e",
                                 "Analyzing errors" = "#20a486",
                                 "Constructing instructional implications" = "#8fd744",
                                 "Think-aloud duration" = "darkgrey"),
                      breaks = c("Noticing results", "Comparing with personal perspective", "Analyzing errors", "Constructing instructional implications", "Think-aloud duration")) + 
  labs(title = "") + 
  scale_y_discrete(limits = c("Think-aloud duration", "Constructing instructional implications", "Analyzing errors", "Comparing with personal perspective", "Noticing results")) +
  xlab("Time (in min)") + 
  ylab("") +
  theme_minimal() + 
   theme(legend.position = "bottom", legend.justification = "center", legend.direction = "horizontal", legend.title = element_text(size = 16), 
  legend.text = element_text(size = 16), 
  plot.title = element_text(hjust = 0, face = "bold", size = 20),
   axis.text = element_text(size = 16),
        axis.title=element_text(size = 16),
  strip.text.x = element_text(size = 16)) +
   scale_fill_discrete(breaks = c("Noticing results", "Comparing with personal perspective", "Analyzing errors", "Constructing instructional implications", "Think-aloud duration")) 

ggsave("Figure 3_Sequences of main steps per teacher_landscape.png", plot = last_plot(), device = "png", units = c("cm"), width = 80, height = 48, dpi = 300)

Co-occurences of addressing different graph types (Appendix)

turndata_processing%>%
  mutate(noticing_analyzing = ifelse(code_noticing_results == 1 & code_analyzing_errors == 1 & code_disturbance == 0, 1, 0),
         noticing_comparing = ifelse(code_noticing_results == 1 & code_comparing_perspective == 1 & code_disturbance == 0, 1, 0),
         noticing_constructing = ifelse(code_noticing_results == 1 & code_constructing_implications == 1 & code_disturbance == 0, 1, 0), 
         analyzing_comparing = ifelse(code_analyzing_errors == 1 & code_comparing_perspective == 1 & code_disturbance == 0, 1, 0),
         analyzing_constructing = ifelse(code_analyzing_errors == 1 & code_constructing_implications == 1 & code_disturbance == 0, 1, 0),
         comparing_constructing = ifelse(code_comparing_perspective == 1 & code_constructing_implications == 1 & code_disturbance == 0, 1, 0),
         think_aloud_clean = ifelse(code_think_aloud == 1 & code_disturbance == 0, 1, 0))%>%
  group_by(pid)%>%
  summarize(think_aloud_duration = sum(think_aloud_clean), 
            noticing_analyzing_duration = sum(noticing_analyzing)/think_aloud_duration*100,
            noticing_comparing_duration = sum(noticing_comparing)/think_aloud_duration*100,
            noticing_constructing_duration = sum(noticing_constructing)/think_aloud_duration*100,
            analyzing_comparing_duration = sum(analyzing_comparing)/think_aloud_duration*100,
            analyzing_constructing_duration = sum(analyzing_constructing)/think_aloud_duration*100,
            comparing_constructing_duration = sum(comparing_constructing)/think_aloud_duration*100)%>%
    select(-think_aloud_duration)%>%
    pivot_longer(., names_to = "var", values_to = "val", - pid)%>%
  ungroup()%>%
  mutate_at(vars(val), funs(round(., 1)))%>%
   ggplot(
     aes(x = factor(var, level = c("noticing_analyzing_duration", "noticing_comparing_duration", "noticing_constructing_duration", "analyzing_comparing_duration", "analyzing_constructing_duration", "comparing_constructing_duration")), y = val)) + 
    geom_boxplot(show.legend = FALSE,
                 alpha = 0.2, 
                 outlier.shape = NA,
                 width = 0.25) + 
  geom_half_point(show.legend = FALSE,
                            side = "l",
                            range_scale = 0.3, 
                            width = 1,
                            alpha = 1,
                            size = 0.5,
                            shape = 1) +
    coord_cartesian(ylim = c(0, 35)) + 
  coord_flip() + 
    theme_minimal() + 
    scale_x_discrete(labels = c("Noticing results/\nAnalyzing errors", 
                                "Noticing results/\nComparing with personal perspective", 
                                "Noticing results/\nconstructing instructional implications", 
                                "Analyzing errors/\nComparing with personal perspective", 
                                "Analyzing errors/\n Constructing instructional implications", 
                                "Comparing with personal perspective/\nconstructing instructional implications")) +  
      labs(title = "", x = "", y = "Percentage") + 
    theme(axis.text.y = element_text(size = 15), 
          axis.text.x = element_text(size = 15), 
          title = element_text(size = 15)) 
Warning: `funs()` was deprecated in dplyr 0.8.0.
ℹ Please use a list of either functions or lambdas:

# Simple named list: list(mean = mean, median = median)

# Auto named with `tibble::lst()`: tibble::lst(mean, median)

# Using lambdas list(~ mean(., trim = .2), ~ median(., na.rm = TRUE))
Coordinate system already present. Adding new coordinate system, which will
replace the existing one.

ggsave("Figure B.1_Co-occurences of processing steps.png", plot = last_plot(), device = "png", units = c("cm"), width = 33, height = 20)

Relative durations of addressing different graph types (Appendix)

eventdata_graphs <- read_excel("data/eventdata_graphs.xlsx")

Wrangling

turndata_graphs <- eventdata_graphs%>%
  mutate(time = map2(start, end, seq, by = "1 sec"))%>%
  unnest(cols = time)%>%
  select(-start, -end, -turnID)%>%
  group_by(time, pid)%>%
  summarise(code = toString(code))%>%
  ungroup()%>%
  splitstackshape::cSplit_e("code", sep = ",", type = "character", drop = F, fill = 0)%>%
  as_tibble()%>%
  arrange(pid) 
`summarise()` has grouped output by 'time'. You can override using the
`.groups` argument.

Visualization

turndata_graphs%>%
  filter(code_think_aloud == 1)%>%
  mutate(graphtype_1 = ifelse(code_graphtype_1 == 1 & code_disturbance == 0, 1, 0),
         graphtype_2 = ifelse(code_graphtype_2 == 1 & code_disturbance == 0, 1, 0), 
         graphtype_3 = ifelse(code_graphtype_3 == 1 & code_disturbance == 0, 1, 0), 
         graphtype_4 = ifelse(code_graphtype_4 == 1 & code_disturbance == 0, 1, 0), 
         graphtype_5 = ifelse(code_graphtype_5 == 1 & code_disturbance == 0, 1, 0),
         code_think_aloud_clean = ifelse(code_think_aloud == 1 & code_disturbance == 0, 1, 0))%>%
  group_by(pid)%>%
  summarize(think_aloud_clean = sum(code_think_aloud_clean), 
            graphtype_1 = sum(graphtype_1)/think_aloud_clean*100,
            graphtype_2 = sum(graphtype_2)/think_aloud_clean*100,
            graphtype_3 = sum(graphtype_3)/think_aloud_clean*100,
            graphtype_4 = sum(graphtype_4)/think_aloud_clean*100,
            graphtype_5 = sum(graphtype_5)/think_aloud_clean*100)%>%
  select(-think_aloud_clean)%>%
  pivot_longer(., names_to = "var", values_to = "val", - pid)%>%
  ungroup()%>%
  mutate(var = factor(var, level = c("graphtype_5", "graphtype_4", "graphtype_3", "graphtype_2", "graphtype_1")))%>%
  ggplot(., 
         aes(x = var, y = val, fill = var)) + 
    geom_boxplot(aes(fill = var), 
                show.legend = FALSE,
                 alpha = 0.2, 
                 outlier.shape = NA,
                 width = 0.25) + 
   scale_fill_manual(values = 
                         c("graphtype_1" = "#462268",
                           "graphtype_2" ="#29788d",
                           "graphtype_3"= "#22a784", 
                           "graphtype_4" = "#6dc95d",
                           "graphtype_5" = "#cddf3c")) +
  geom_half_point(aes(color = var), 
                            show.legend = FALSE,
                            side = "l",
                            range_scale = 0.3, 
                            width = 1,
                            alpha = 1,
                            size = 0.5,
                            shape = 1) +
    scale_color_manual(values = 
                         c("graphtype_1" = "#462268",
                           "graphtype_2" ="#29788d",
                           "graphtype_3"= "#22a784", 
                           "graphtype_4" = "#6dc95d",
                           "graphtype_5" = "#cddf3c")) + 
  coord_flip() + 
  scale_x_discrete(labels = c("Graph 5:\nOverview of\ncurriculum topics",
                                "Graph 4:\nCompetence levels", 
                                "Graph 3:\nTasks and task answers of\nindividual students",
                                "Graph 2:\nClass overview",
                                "Graph 1:\nLearning progress on\nclass or individual level"
                                )) +
  ggtitle("") +
    theme_minimal() + 
    theme(axis.title.x = element_text(size = 17),
          axis.text.x = element_text(size = 15),
          axis.text.y = element_text(size = 17), 
          legend.position = "none") + 
    xlab("") + 
    ylab("Relative durations of addressing graph types, in %") 

ggsave("Figure B.2_Relative durations of adressing graph types.png", plot = last_plot(), device = "png", units = c("cm"), width = 33, height = 20)

Research Question 2: Different groups of teachers (clustering)

Clustering

Wrangling noticing aspects data frame

data_aspects_for_clustering <- 
data_aspects%>%
  group_by(pid)%>%
  count(code)%>%
  pivot_wider(names_from = code, values_from = n)%>%
  ungroup()%>%
  add_row(pid = "Tom")%>% 
  add_row(pid = "Miranda")%>%
  add_row(pid = "James")%>%
  add_row(pid = "Bill")%>%
  mutate_all(funs(ifelse(is.na(.), 0, .))) # In consensus, no codes were coded for Tom, Miranda, James and Bill so rows with 0 are added here
Warning: `funs()` was deprecated in dplyr 0.8.0.
ℹ Please use a list of either functions or lambdas:

# Simple named list: list(mean = mean, median = median)

# Auto named with `tibble::lst()`: tibble::lst(mean, median)

# Using lambdas list(~ mean(., trim = .2), ~ median(., na.rm = TRUE))

Wrangling relative durations data frame

processing_duration_for_clustering <- 
processing_duration%>%
  pivot_wider(names_from = var, values_from = val)%>%
  filter(pid != "Clarence")%>% # excluded due to missing values
   mutate_at(c(2:5), funs(c(scale(.)))) # standardizing before clustering
Warning: `funs()` was deprecated in dplyr 0.8.0.
ℹ Please use a list of either functions or lambdas:

# Simple named list: list(mean = mean, median = median)

# Auto named with `tibble::lst()`: tibble::lst(mean, median)

# Using lambdas list(~ mean(., trim = .2), ~ median(., na.rm = TRUE))
data_clustering <- 
full_join(processing_duration_for_clustering, data_aspects_for_clustering, by = "pid")%>%
  select(-pid)

Computing (dis)similarity matrix using Gower´s coefficient

diss_gower <- daisy(data_clustering, 
                    metric = c("gower"), 
                    type = list(symm = c(5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17))) # var of noticing are treated as symmetric binaries, other variables as numeric 

Clustering using complete linkage algorithm

cluster_complete <- agnes(diss_gower, diss = FALSE, method = "complete")

print(cluster_complete)
Call:    agnes(x = diss_gower, diss = FALSE, method = "complete") 
Agglomerative coefficient:  0.6981957 
Order of objects:
 [1] 1  5  26 18 39 30 4  6  37 23 3  20 11 27 45 10 43 13 14 34 28 33 19 21 16
[26] 29 24 32 36 35 42 2  47 12 15 44 31 7  22 38 9  17 41 40 8  25 46
Height (summary):
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
 0.1570  0.5019  0.6904  0.7728  0.9712  1.8419 

Available components:
[1] "order"     "height"    "ac"        "merge"     "diss"      "call"     
[7] "method"    "order.lab" "data"     
plot(cluster_complete)

cutree(cluster_complete, k = 3) # Partition 
 [1] 1 2 1 1 1 1 2 3 2 1 1 2 1 1 2 1 2 1 1 1 1 2 1 1 3 1 1 1 1 1 2 1 1 1 1 1 1 2
[39] 1 2 2 1 1 2 1 3 2

Computing heatmap for visualization clustering with complete linkage algorithm

col_fun1 = colorRamp2(c(2, 0, -2), c("#55C667FF", "#FDE725FF", "#481567FF")) # color as function

png("heatmap_gower_complete linkage.png", width=1600, height=1000, bg = "transparent") 

h1 = Heatmap(data_clustering, name = "Legend", 
             col = col_fun1,
        cluster_rows = agnes(diss_gower, diss = FALSE, method = "complete"), 
   column_title = "", 
  show_column_dend = TRUE, 
   row_names_gp = gpar(fontsize = 18), 
    column_names_gp = gpar(fontsize = 18),
   width = (unit(25, "cm")), 
       row_dend_width = unit (85, "mm"), 
    column_dend_height = unit (35, "mm"), 
      column_names_side = "bottom",
       column_names_rot = 45, 
      heatmap_legend_param = list(title = ""))
Warning: The input is a data frame-like object, convert it to a matrix.
draw(h1, heatmap_legend_side = "bottom")

 dev.off()
quartz_off_screen 
                2 

Clustering using average linkage algorithm

cluster_average <- agnes(diss_gower, diss = FALSE, method = "average")

print(cluster_average) # overall result
Call:    agnes(x = diss_gower, diss = FALSE, method = "average") 
Agglomerative coefficient:  0.5543031 
Order of objects:
 [1] 1  4  6  23 37 3  20 11 27 45 5  26 18 39 10 43 13 30 14 34 28 19 33 21 35
[26] 42 16 29 24 32 36 8  9  2  47 25 12 15 44 31 22 38 7  17 41 40 46
Height (summary):
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
 0.1570  0.4954  0.6506  0.6813  0.8250  1.2404 

Available components:
[1] "order"     "height"    "ac"        "merge"     "diss"      "call"     
[7] "method"    "order.lab" "data"     
plot(cluster_average) # Dendrogramm

cutree(cluster_average, k = 3) # Partition
 [1] 1 2 1 1 1 1 2 3 3 1 1 2 1 1 2 1 2 1 1 1 1 2 1 1 2 1 1 1 1 1 2 1 1 1 1 1 1 2
[39] 1 2 2 1 1 2 1 2 2

Computing heatmap for visualization clustering with average linkage algorithm

col_fun2 = colorRamp2(c(2, 1, 0, -2), c("#35b779", "#addc30", "#f6f6f6", "#481567FF"))                         
png("Figure 4_Visualization clustering_heatmap_gower_average_raw.png", width=1800, height=1200) 

h2 = Heatmap(data_clustering, name = "Legend", 
             col = col_fun2,
        cluster_rows = agnes(diss_gower, diss = FALSE, method = "average"), 
   column_title = "", 
  show_column_dend = TRUE, 
  show_column_names = FALSE,
   row_names_gp = gpar(fontsize = 18), 
    column_names_gp = gpar(fontsize = 18),
   width = (unit(25, "cm")), 
       row_dend_width = unit (85, "mm"), 
    column_dend_height = unit (35, "mm"), 
      column_names_side = "bottom",
       column_names_rot = 45, 
      heatmap_legend_param = list(title = "", 
                                  at = c(-2, 0, 1, 2), 
                                  labels = c("low duration of main step", "aspect is not noticed", "aspect is noticed", "high duration of main step"), 
                                  labels_gp = gpar(fontsize = 18), 
                                  direction = c("vertical"), 
                                  labels_rot = 45), 
  bottom_annotation = columnAnnotation(var = anno_text(c("Noticing", "Comparing", "Analyzing", "Constructing", "Middle group", "Result - name", "Weakest group", "Assessm. finished", "Best group", "Mean", 
      "Result student", "Outlier", "Weakest student", "Disperson (gen.)", "Range", "Best student", "Disperson (mean)"), gp = gpar(fontsize = 20), rot = 50)))
Warning: The input is a data frame-like object, convert it to a matrix.
draw(h2, heatmap_legend_side = "right")

 dev.off()
quartz_off_screen 
                2 

Comparing partitions of different algorithms for robust results

Comparing partitions of three group-solution (group A, B, C in Figure 4 in the manuscript)

adjustedRandIndex(new("Partition", 
                      c(1, 3, 1, 1, 1, 1, 3, 4, 4, 1, 
                        1, 3, 1, 2, 3, 2, 3, 1, 2, 1,
                        2, 3, 1, 2, 3, 1, 1, 2, 2, 1, 
                        3, 2, 2, 2, 2, 2, 1, 3, 1, 3, 
                        3, 2, 1, 3, 1, 3, 3)), # average linkage, "outliers" according to the dendrogram are treated as a forth group 
                  new("Partition", 
                      c(1, 3, 1, 1, 1, 1, 3, 3, 3, 1, 
                        1, 3, 1, 2, 3, 2, 3, 1, 2, 1, 
                        2, 3, 1, 2, 3, 1, 1, 2, 2, 1, 
                        3, 2, 2, 2, 2, 2, 1, 3, 1, 3, 
                        3, 2, 1, 3, 1, 3, 3))) # complete linkage
[1] 0.9396869

Research Question 3: Main process (process mining)

Eventlog object

eventlog <- eventdata_processing%>%
  filter(code %in% c("noticing_results", "comparing_perspective", "analyzing_errors", "constructing_implications"))%>%
  rename(complete = end)%>%
  convert_timestamps(columns = c("start", "complete"), format = ymd_hms)%>%
  mutate(dummy = as.character("a"))%>% 
  activitylog(case_id = "pid",
                activity_id = "code",
                timestamps = c("start", "complete"), 
               resource_id = "dummy")%>% 
  to_eventlog()
Warning: There was 1 warning in `mutate()`.
ℹ In argument: `start = (function (..., quiet = FALSE, tz = "UTC", locale =
  Sys.getlocale("LC_TIME"), ...`.
Caused by warning:
!  2 failed to parse.

Computing dependency matrix and visualization in dependency graph with low thresholds

Dependency matrix

dep_matrix <- dependency_matrix(eventlog, 
                                  dependency_type =  dependency_type_fhm(threshold_dependency = 0, # dependency measure
                                  threshold_l1 = 0, # loop 1
                                  threshold_l2 = 0, # loop 2
                                  threshold_frequency = 0, # frequency
                                  all_connected = TRUE, # T = connecting best antecedent and consequent regardless of threshold
                                  endpoints_connected = TRUE)) # T = connecting start/end activity as antecedent/consequent even when according to threshold it would not be connected

print(dep_matrix)
                           consequent
antecedent                  analyzing_errors comparing_perspective
  analyzing_errors                0.95000000            0.00000000
  comparing_perspective           0.10256410            0.85714286
  constructing_implications       0.00000000            0.00000000
  End                             0.00000000            0.00000000
  noticing_results                0.08518519            0.03433476
  Start                           0.00000000            0.80000000
                           consequent
antecedent                  constructing_implications       End
  analyzing_errors                          0.2117647 0.9000000
  comparing_perspective                     0.1250000 0.8333333
  constructing_implications                 0.9285714 0.9090909
  End                                       0.0000000 0.0000000
  noticing_results                          0.0000000 0.9600000
  Start                                     0.7500000 0.0000000
                           consequent
antecedent                  noticing_results Start
  analyzing_errors                0.00000000     0
  comparing_perspective           0.00000000     0
  constructing_implications       0.08588957     0
  End                             0.00000000     0
  noticing_results                0.99700599     0
  Start                           0.97619048     0
attr(,"class")
[1] "dependency_matrix" "matrix"            "array"            
dep_matrix%>%
render_dependency_matrix(render = TRUE)

Calculating frequencies and visualization in directly follows graph

dfg <- causal_net(dependencies = dep_matrix,
                  bindings = causal_bindings(eventlog, dep_matrix, "nearest"),
                  threshold = 0, 
                  threshold_frequency = 0) 

dfg%>%
render_causal_net(render = TRUE)

Computing dependency matrix, visualization in dependency graph and frequencies with higher thresholds on dependency measure (0.5 and default)

Dependency matrix, 0.5 threshold

dep_matrix_05 <- dependency_matrix(eventlog, 
                                  dependency_type =  dependency_type_fhm(threshold_dependency = 0.5, 
                                  threshold_l1 = 0, # loop 1
                                  threshold_l2 = 0, # loop 2
                                  threshold_frequency = 0, # frequency
                                  all_connected = TRUE, # T = connecting best antecedent and consequent regardless of threshold
                                  endpoints_connected = TRUE)) # T = connecting start/end activity as antecedent/consequent even when according to threshold it would not be connected

print(dep_matrix_05)
                           consequent
antecedent                  analyzing_errors comparing_perspective
  analyzing_errors                 0.0000000             0.0000000
  comparing_perspective            0.1025641             0.8571429
  constructing_implications        0.0000000             0.0000000
  End                              0.0000000             0.0000000
  noticing_results                 0.0000000             0.0000000
  Start                            0.0000000             0.8000000
                           consequent
antecedent                  constructing_implications       End
  analyzing_errors                          0.0000000 0.9000000
  comparing_perspective                     0.0000000 0.8333333
  constructing_implications                 0.9285714 0.9090909
  End                                       0.0000000 0.0000000
  noticing_results                          0.0000000 0.9600000
  Start                                     0.7500000 0.0000000
                           consequent
antecedent                  noticing_results Start
  analyzing_errors                 0.0000000     0
  comparing_perspective            0.0000000     0
  constructing_implications        0.0000000     0
  End                              0.0000000     0
  noticing_results                 0.9970060     0
  Start                            0.9761905     0
attr(,"class")
[1] "dependency_matrix" "matrix"            "array"            
dep_matrix_05%>%
render_dependency_matrix(render = TRUE)

Directly follows graph, 0.5 threshold

dfg_05 <- causal_net(dependencies = dep_matrix_05,
                  bindings = causal_bindings(eventlog, dep_matrix_05, "nearest")) 

dfg_05%>%
render_causal_net(render = TRUE)

Dependency matrix, default

dep_matrix_default <- dependency_matrix(eventlog, 
                                  dependency_type =  dependency_type_fhm(threshold_dependency = 0.9, # dependency measure default
                                  threshold_l1 = 0, # loop 1
                                  threshold_l2 = 0, # loop 2
                                  threshold_frequency = 0, # frequency
                                  all_connected = TRUE, # T = connecting best antecedent and consequent regardless of threshold
                                  endpoints_connected = TRUE)) # T = connecting start/end activity as antecedent/consequent even when according to threshold it would not be connected

print(dep_matrix_default)
                           consequent
antecedent                  analyzing_errors comparing_perspective
  analyzing_errors                 0.0000000                   0.0
  comparing_perspective            0.1025641                   0.0
  constructing_implications        0.0000000                   0.0
  End                              0.0000000                   0.0
  noticing_results                 0.0000000                   0.0
  Start                            0.0000000                   0.8
                           consequent
antecedent                  constructing_implications       End
  analyzing_errors                               0.00 0.9000000
  comparing_perspective                          0.00 0.8333333
  constructing_implications                      0.00 0.9090909
  End                                            0.00 0.0000000
  noticing_results                               0.00 0.9600000
  Start                                          0.75 0.0000000
                           consequent
antecedent                  noticing_results Start
  analyzing_errors                 0.0000000     0
  comparing_perspective            0.0000000     0
  constructing_implications        0.0000000     0
  End                              0.0000000     0
  noticing_results                 0.9970060     0
  Start                            0.9761905     0
attr(,"class")
[1] "dependency_matrix" "matrix"            "array"            
dep_matrix_default%>%
render_dependency_matrix(render = TRUE)

Directly follows graph, default

dfg_default <- causal_net(dependencies = dep_matrix_default,
                  bindings = causal_bindings(eventlog, dep_matrix_default, "nearest")) 

dfg_default%>%
render_causal_net(render = TRUE)